#!/usr/bin/python
# coding=utf-8

import requests
from bs4 import BeautifulSoup
import re
import time


def read_info(url):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36'
                         ' (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'}
    r = requests.get(url,headers = headers)
    if r.status_code != 200:
        return
    list = []
    soup = BeautifulSoup(r.text,'lxml')
    house_name = soup.find('h4').text
    address = soup.find('div',class_= 'pho_info').p.get('title')
    house_price = soup.find('div',class_='day_top clearfix').span.text
    href_source = soup.find('div', class_ = 'pho_show_big')
    picture_href = re.findall('src="(.*?)"',str(href_source))
    list.append(house_name)
    list.append(address)
    list.append(house_price)
    list.append(picture_href)
    sex = soup.find('div','js_box clearfix').h6.span
    if 'girl' in str(sex):
         list.append('girl')
    else:
        list.append('boy')
    return list


headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36'
                         ' (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'}
for i in range(1,6):
    r = requests.get("http://bj.xiaozhu.com/search-duanzufang-p" + str(i) + '-0/',headers = headers)
    soup = BeautifulSoup(r.text,'lxml')
    href_source = soup.find_all('div',id='page_list')
    href = re.findall('href\="(.*\.html)"',str(href_source))
    for link in href:
        if __name__ == "__main__":
            print(read_info(link))
    time.sleep(3)